import ast
import json
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm
conf_threshold = 0.6
save_plots = False
save_csv_file = False
train_features_label = pd.read_csv("../eda/train_features_label.csv", index_col='id')
mega_json = json.load(open('train_features_output.json'))
mega_json.keys()
dict_keys(['images', 'detection_categories', 'info'])
dict_detection_cat = mega_json['detection_categories']
dict_detection_cat
{'1': 'animal', '2': 'person', '3': 'vehicle'}
mega_detector_info = mega_json['info']
mega_detector_info
{'detection_completion_time': '2023-04-01 11:21:59',
'format_version': '1.2',
'detector': 'md_v5a.0.0.pt',
'detector_metadata': {'megadetector_version': 'v5a.0.0',
'typical_detection_threshold': 0.2,
'conservative_detection_threshold': 0.05}}
df_images = pd.DataFrame(mega_json["images"])
df_images = df_images.reset_index(drop=True)
df_images
| file | max_detection_conf | detections | |
|---|---|---|---|
| 0 | ZJ000000.jpg | 0.690 | [{'category': '1', 'conf': 0.00851, 'bbox': [0... |
| 1 | ZJ000001.jpg | 0.813 | [{'category': '1', 'conf': 0.813, 'bbox': [0.5... |
| 2 | ZJ000002.jpg | 0.612 | [{'category': '1', 'conf': 0.00643, 'bbox': [0... |
| 3 | ZJ000003.jpg | 0.686 | [{'category': '1', 'conf': 0.00796, 'bbox': [0... |
| 4 | ZJ000004.jpg | 0.476 | [{'category': '1', 'conf': 0.476, 'bbox': [0.5... |
| ... | ... | ... | ... |
| 16483 | ZJ016483.jpg | 0.000 | [] |
| 16484 | ZJ016484.jpg | 0.528 | [{'category': '1', 'conf': 0.00831, 'bbox': [0... |
| 16485 | ZJ016485.jpg | 0.151 | [{'category': '1', 'conf': 0.151, 'bbox': [0.4... |
| 16486 | ZJ016486.jpg | 0.264 | [{'category': '1', 'conf': 0.0139, 'bbox': [0.... |
| 16487 | ZJ016487.jpg | 0.896 | [{'category': '1', 'conf': 0.0178, 'bbox': [0.... |
16488 rows × 3 columns
# several bbox per image possible
df_images['detections'][0]
[{'category': '1', 'conf': 0.00851, 'bbox': [0.9229, 0.8129, 0.07708, 0.1555]},
{'category': '1', 'conf': 0.69, 'bbox': [0.1093, 0.5888, 0.0802, 0.1851]}]
def read_megadetector_json(path_json:str = 'train_features_output.json'):
mega_json = json.load(open(path_json))
df_images = pd.DataFrame(mega_json["images"])
df_images = df_images.reset_index(drop=True)
return df_images
def transform_bbox(image=None, normalized_bbox=None, image_size:int=None):
if image is not None:
image_width, image_height = image.size
else:
image_width, image_height = image_size, image_size
x, y, width, height = normalized_bbox
remove_bbox_xy = 4
remove_bbox_wh = remove_bbox_xy * 2
x = int(np.round(x * image_width, 0)) + remove_bbox_xy
y = int(np.round(y * image_height, 0)) + remove_bbox_xy
width = int(np.round(width * image_width, 0)) - remove_bbox_wh
height = int(np.round(height * image_height, 0)) - remove_bbox_wh
return y, x, height, width
def get_correct_box(df, train_data):
bbox_transformed = []
bbox_transformed_im_size_224 = []
image_size = 224
if train_data:
data_path = r"../competition_data/train_features/"
else:
data_path = r"../competition_data/test_features/"
for image_name in df.index:
if df.loc[image_name]["bbox_true"]:
path = data_path + df.loc[image_name]["file"]
image = Image.open(path).convert("RGB")
normalized_bbox = df.loc[image_name]['bbox_normalized']
bbox_transformed.append(transform_bbox(image=image, normalized_bbox=normalized_bbox))
bbox_transformed_im_size_224.append(transform_bbox(normalized_bbox=normalized_bbox, image_size=image_size))
else:
bbox_transformed.append(pd.NA)
bbox_transformed_im_size_224.append(pd.NA)
df['bbox'] = bbox_transformed
df['bbox_im_size_224'] = bbox_transformed_im_size_224
return df
def get_clean_dataframe_from_json(threshold:float=0.6, save_csv=False, filter_th=True, train_data=True):
'''
filter: filters bbox below threshold if true
'''
df_images = read_megadetector_json()
df_images_detections = df_images['detections'].apply(pd.Series, dtype='object')
#display(df_images_detections.head(2))
df_images_clean = df_images.merge(df_images_detections, left_index=True, right_index=True).drop(columns='detections')
#display(df_images_clean.head(2))
df_images_clean = df_images_clean.melt(id_vars=['file', 'max_detection_conf']).sort_values('file')
#display(df_images_clean)
# remove nan values
df_images_clean = df_images_clean.dropna(subset='value')
df_images_clean = df_images_clean.drop(columns='variable')
# expand category, conf, bbox
df_cat_conf_bbox = df_images_clean['value'].apply(pd.Series, dtype='object')
df_images_clean = df_images_clean.merge(df_cat_conf_bbox, left_index=True, right_index=True).drop(columns='value')
# take only detections for max detection conf
df_images_clean = df_images_clean[df_images_clean['max_detection_conf'] == df_images_clean['conf']]
df_images_clean = df_images_clean.reset_index(drop=True)
# leftjoin clean data
df_images_clean = pd.merge(df_images['file'], df_images_clean, on='file', how='left')
# cast datatypes
df_images_clean['category'] = df_images_clean['category'].astype('category')
df_images_clean['category'] = df_images_clean['category'].replace(dict_detection_cat)
# change index to image name
df_images_clean['image_name'] = [image.replace('.jpg', '') for image in df_images_clean['file']]
df_images_clean.index = df_images_clean['image_name']
df_images_clean = df_images_clean.drop(columns='image_name')
# remove duplicates
df_images_clean = df_images_clean[df_images_clean.index.duplicated() == False]
# bbox
df_images_clean['bbox_normalized'] = df_images_clean['bbox']
df_images_clean['bbox_true'] = df_images_clean['bbox_normalized'].notnull()
# transform bbox
df_images_clean = get_correct_box(df_images_clean, train_data)
if filter_th:
df_images_clean = df_images_clean[df_images_clean['max_detection_conf'] > threshold]
if save_csv: df_images_clean.to_csv(f'megadetector_image_detection_bbox_th{str(threshold).replace(".", "")}.csv')
return df_images_clean
# Check the quality of the bounding boxes for all confidence levels with filter_th=False.
df_images_clean = get_clean_dataframe_from_json(0, save_csv=False, filter_th=False)
df_images_clean
| file | max_detection_conf | category | conf | bbox | bbox_normalized | bbox_true | bbox_im_size_224 | |
|---|---|---|---|---|---|---|---|---|
| image_name | ||||||||
| ZJ000000 | ZJ000000.jpg | 0.690 | animal | 0.690 | (322, 109, 92, 69) | [0.1093, 0.5888, 0.0802, 0.1851] | True | (136, 28, 33, 10) |
| ZJ000001 | ZJ000001.jpg | 0.813 | animal | 0.813 | (4, 542, 515, 120) | [0.5604, 0, 0.1333, 0.9685] | True | (4, 130, 209, 22) |
| ZJ000002 | ZJ000002.jpg | 0.612 | animal | 0.612 | (284, 330, 56, 74) | [0.5093, 0.7777, 0.1281, 0.1777] | True | (178, 118, 32, 21) |
| ZJ000003 | ZJ000003.jpg | 0.686 | animal | 0.686 | (230, 4, 107, 129) | [0, 0.6277, 0.214, 0.3194] | True | (145, 4, 64, 40) |
| ZJ000004 | ZJ000004.jpg | 0.476 | animal | 0.476 | (194, 326, 91, 66) | [0.5031, 0.5671, 0.1156, 0.2955] | True | (131, 117, 58, 18) |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| ZJ016483 | ZJ016483.jpg | NaN | NaN | NaN | <NA> | NaN | False | <NA> |
| ZJ016484 | ZJ016484.jpg | 0.528 | animal | 0.528 | (4, 399, 335, 237) | [0.6171, 0, 0.3828, 0.9527] | True | (4, 142, 205, 78) |
| ZJ016485 | ZJ016485.jpg | 0.151 | animal | 0.151 | (291, 279, 49, 129) | [0.4296, 0.7972, 0.214, 0.1583] | True | (183, 100, 27, 40) |
| ZJ016486 | ZJ016486.jpg | 0.264 | animal | 0.264 | (302, 124, 46, 85) | [0.125, 0.5518, 0.09687, 0.1] | True | (128, 32, 14, 14) |
| ZJ016487 | ZJ016487.jpg | 0.896 | animal | 0.896 | (184, 431, 142, 205) | [0.6671, 0.4999, 0.3328, 0.4166] | True | (116, 153, 85, 67) |
16488 rows × 8 columns
Das Modell des Megadetectors unterscheidet zwischen drei Kategorien: Animal, Person, Vehicle
df_images_clean['category'].unique().to_list()
['animal', nan, 'person', 'vehicle']
figsize = (8, 4)
figure = plt.figure(figsize=figsize)
df_images_clean['category'].value_counts().plot.bar()
cat_na = df_images_clean.shape[0]- df_images_clean['category'].value_counts().sum()
plt.title(f'Verteilung der detektierten Kategorien, na:{cat_na}')
plt.ylabel('Count')
plt.tight_layout()
if save_plots: plt.savefig('./plots/dist_detection_classes.png')
plt.show()
figure = plt.figure(figsize=figsize)
df_images_clean[df_images_clean['category'] == 'animal']['max_detection_conf'].plot.hist()
plt.title('Verteilung von max_detection_conf für Kategorie 1 (Animal)')
plt.xlabel('max_detection_conf')
plt.tight_layout()
if save_plots: plt.savefig('./plots/dist_conf_animal.png')
plt.show()
figure = plt.figure(figsize=figsize)
df_images_clean[df_images_clean['category'] == 'person']['max_detection_conf'].plot.hist()
plt.title('Verteilung von max_detection_conf für Kategorie 2 (Person)')
plt.xlabel('max_detection_conf')
plt.tight_layout()
if save_plots: plt.savefig('./plots/dist_conf_person.png')
plt.show()
figure = plt.figure(figsize=figsize)
df_images_clean[df_images_clean['category'] == 'vehicle']['max_detection_conf'].plot.hist()
plt.title('Verteilung von max_detection_conf für Kategorie 3 (vehicle)')
plt.xlabel('max_detection_conf')
plt.tight_layout()
if save_plots: plt.savefig('./plots/dist_conf_vehicle.png')
plt.show()
person und vehicle¶Bilder aus den Megadetector Vorhersagen
lst_img_cat1 = df_images_clean[df_images_clean['category'] == 'animal']['file']
lst_img_cat1 = lst_img_cat1.reset_index(drop=True)
lst_img_cat1 = [image_name.replace('.jpg', '') for image_name in lst_img_cat1]
lst_img_cat1
lst_img_cat2 = df_images_clean[df_images_clean['category'] == 'person']['file']
lst_img_cat2 = lst_img_cat2.reset_index(drop=True)
lst_img_cat2 = [image_name.replace('.jpg', '') for image_name in lst_img_cat2]
lst_img_cat2
lst_img_cat3 = df_images_clean[df_images_clean['category'] == 'vehicle']['file']
lst_img_cat3 = lst_img_cat3.reset_index(drop=True)
lst_img_cat3 = [image_name.replace('.jpg', '') for image_name in lst_img_cat3]
lst_img_cat3
path_img_train = '../competition_data/'
path_img_mega = './train_features_detection_th01/'
Random Ansicht person
random_lst_img_cat2 = random.sample(lst_img_cat2, 6)
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(12, 6))
# iterate through each bad image
for idx, (img_id, ax) in enumerate(zip(random_lst_img_cat2, axes.flat)):
# get image label
img_label = train_features_label.loc[img_id]['label']
# reads the filepath and returns a numpy array
img = mpimg.imread(path_img_mega + str(img_id).upper() + '_detections.jpg')
# get category
cat, conf = df_images_clean.loc[img_id]['category'], df_images_clean.loc[img_id]['max_detection_conf'],
# plot etc
ax.imshow(img)
ax.set_title(f"{img_id} | {img_label} | {cat} | Conf: {conf:2f}", fontsize=8)
plt.tight_layout()
plt.show()
Random Ansicht vehicle
random_lst_img_cat3 = random.sample(lst_img_cat3, 6)
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(12, 6))
# iterate through each bad image
for idx, (img_id, ax) in enumerate(zip(random_lst_img_cat3, axes.flat)):
# get image label
img_label = train_features_label.loc[img_id]['label']
# reads the filepath and returns a numpy array
img = mpimg.imread(path_img_mega + str(img_id).upper() + '_detections.jpg')
# get category
cat, conf = df_images_clean.loc[img_id]['category'], df_images_clean.loc[img_id]['max_detection_conf'],
# plot etc
ax.imshow(img)
ax.set_title(f"{img_id} | {img_label} | {cat} | Conf: {conf:2f}", fontsize=8)
plt.tight_layout()
plt.show()
animal¶random_lst_img_cat1 = random.sample(lst_img_cat1, 12)
fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 15))
# iterate through each bad image
for idx, (img_id, ax) in enumerate(zip(random_lst_img_cat1, axes.flat)):
# get image label
img_label = train_features_label.loc[img_id]['label']
# reads the filepath and returns a numpy array
img = mpimg.imread(path_img_mega + str(img_id).upper() + '_detections.jpg')
# get category
cat, conf = df_images_clean.loc[img_id]['category'], df_images_clean.loc[img_id]['max_detection_conf'],
# plot etc
ax.imshow(img)
ax.set_title(f"{img_id} | {img_label} | {cat} | Conf: {conf:2f}", fontsize=8)
fig.tight_layout()
plt.show()
missclassification_findings = ['ZJ010981', 'ZJ014854', 'ZJ003542']
random_miss_class= random.sample(missclassification_findings, 3)
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(12, 12))
# iterate through each bad image
for idx, (img_id, ax) in enumerate(zip(random_miss_class, axes.flat)):
# get image label
img_label = train_features_label.loc[img_id]['label']
# reads the filepath and returns a numpy array
img = mpimg.imread(path_img_mega + str(img_id).upper() + '_detections.jpg')
# get category
cat, conf = df_images_clean.loc[img_id]['category'], df_images_clean.loc[img_id]['max_detection_conf'],
# plot etc
ax.imshow(img)
ax.set_title(f"{img_id} | {img_label} | {cat} | Conf: {conf:2f}", fontsize=8)
plt.tight_layout()
plt.show()
def get_df_bbox_label(label_path:str="../eda/train_features_label.csv", threshold_bbox:float=0.4, filter_th=True):
'''
label_path: path for images and label csv
threshold_bbox: set confidence threshold
The function cleans the Megadetector JSON file, removes the bounding boxes for
confidence levels lower than the specified threshold. Creates a dataframe with
Image name, labels and bounding boxes
'''
train_features_label = pd.read_csv(label_path, index_col='id')
df_images_th = get_clean_dataframe_from_json(threshold_bbox, save_csv=False, filter_th=filter_th)
df_images_th_label = train_features_label.merge(df_images_th, left_index=True, right_index=True, how='left')
df_images_th_label['bbox_true'] = df_images_th_label['bbox_normalized'].notnull()
return df_images_th_label
# test threshold
threshold = 0.4
df_images_th04_label = get_df_bbox_label(threshold_bbox=threshold, filter_th=True)
display(df_images_th04_label)
| filepath | site | label | file | max_detection_conf | category | conf | bbox | bbox_normalized | bbox_true | bbox_im_size_224 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||
| ZJ000000 | train_features/ZJ000000.jpg | S0120 | bird | ZJ000000.jpg | 0.690 | animal | 0.690 | (322, 109, 92, 69) | [0.1093, 0.5888, 0.0802, 0.1851] | True | (136, 28, 33, 10) |
| ZJ000001 | train_features/ZJ000001.jpg | S0069 | monkey_prosimian | ZJ000001.jpg | 0.813 | animal | 0.813 | (4, 542, 515, 120) | [0.5604, 0, 0.1333, 0.9685] | True | (4, 130, 209, 22) |
| ZJ000002 | train_features/ZJ000002.jpg | S0009 | bird | ZJ000002.jpg | 0.612 | animal | 0.612 | (284, 330, 56, 74) | [0.5093, 0.7777, 0.1281, 0.1777] | True | (178, 118, 32, 21) |
| ZJ000003 | train_features/ZJ000003.jpg | S0008 | monkey_prosimian | ZJ000003.jpg | 0.686 | animal | 0.686 | (230, 4, 107, 129) | [0, 0.6277, 0.214, 0.3194] | True | (145, 4, 64, 40) |
| ZJ000004 | train_features/ZJ000004.jpg | S0036 | leopard | ZJ000004.jpg | 0.476 | animal | 0.476 | (194, 326, 91, 66) | [0.5031, 0.5671, 0.1156, 0.2955] | True | (131, 117, 58, 18) |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| ZJ016483 | train_features/ZJ016483.jpg | S0093 | blank | NaN | NaN | NaN | NaN | NaN | NaN | False | NaN |
| ZJ016484 | train_features/ZJ016484.jpg | S0043 | leopard | ZJ016484.jpg | 0.528 | animal | 0.528 | (4, 399, 335, 237) | [0.6171, 0, 0.3828, 0.9527] | True | (4, 142, 205, 78) |
| ZJ016485 | train_features/ZJ016485.jpg | S0089 | civet_genet | NaN | NaN | NaN | NaN | NaN | NaN | False | NaN |
| ZJ016486 | train_features/ZJ016486.jpg | S0095 | bird | NaN | NaN | NaN | NaN | NaN | NaN | False | NaN |
| ZJ016487 | train_features/ZJ016487.jpg | S0021 | civet_genet | ZJ016487.jpg | 0.896 | animal | 0.896 | (184, 431, 142, 205) | [0.6671, 0.4999, 0.3328, 0.4166] | True | (116, 153, 85, 67) |
16488 rows × 11 columns
Es ist schwierig die Qualität der Bounding Boxen quantitativ zu prüfen. Folgend wird getestet, ob für für die einzelnen Tierklassen eine Boundingbox zu verschiedenen Threshold vorhanden ist.
def count_bbox_th_class(animal_class, thresholds:list):
'''
animal_class: class
thresholds: list with thresholds to be tested
Function filters df for class and counts the relative number [%]
of bbox available (bbox either false or true)
'''
bbox_true_list = []
bbox_fals_list = []
for th in thresholds:
df_images_th_label = get_df_bbox_label(threshold_bbox=th)
df_images_class = df_images_th_label[df_images_th_label['label'] == animal_class]
relative_count = (df_images_class['bbox_true']
.value_counts(normalize=True)
.sort_values(ascending=False)
.sort_index(ascending=False))
bbox_true_list.append(relative_count[0]*100)
bbox_fals_list.append(relative_count[1]*100)
return bbox_true_list, bbox_fals_list
def plot_count_bbox_th_class(animal_class, thresholds, figsize=(6,4), save_plot=False):
blank_bbox_true, blank_bbox_false = count_bbox_th_class(animal_class, thresholds)
figure = plt.figure(figsize=figsize)
plt.plot(thresholds, blank_bbox_true)
plt.suptitle('Relativer Count für Bbox zu verschiedenen Treshholds', fontsize=12)
plt.title(f'Klasse {animal_class}', fontsize=10)
plt.xlabel('thresholds', fontsize=8)
plt.ylabel('relativer Anteil [%]', fontsize=10)
plt.axvspan(thresholds[0], 0.4, facecolor='gray', alpha=0.2)
plt.axvspan(0.7, thresholds[-1], facecolor='gray', alpha=0.2)
plt.grid()
plt.tight_layout()
if save_plots: plt.savefig(f'./plots/rel_count_bbox_{animal_class}.png')
plt.show()
def plot_count_bbox_th_class_all(thresholds, figsize=(12,6), save_plot=False):
classes = ['bird', 'monkey_prosimian', 'leopard', 'hog', 'civet_genet',
'antelope_duiker', 'blank', 'rodent']
figure = plt.figure(figsize=figsize)
for animal_class in classes:
class_bbox_true, class_bbox_false = count_bbox_th_class(animal_class, thresholds)
plt.plot(thresholds, class_bbox_true, label=f'{animal_class}')
plt.suptitle('Relativer Count für Bbox zu verschiedenen Treshholds', fontsize=12)
plt.xlabel('thresholds', fontsize=8)
plt.ylabel('relativer Anteil [%]', fontsize=10)
plt.legend(loc='upper right', bbox_to_anchor=(1.21, 1))
plt.axvspan(thresholds[0], 0.4, facecolor='gray', alpha=0.5)
plt.axvspan(0.7, thresholds[-1], facecolor='gray', alpha=0.2)
plt.grid()
plt.tight_layout()
if save_plots: plt.savefig(f'./plots/rel_count_bbox_all_classes.png')
plt.show()
blank¶# runtime: ca. 6min
#thresholds = [0.4, 0.5, 0.6, 0.7]
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
plot_count_bbox_th_class('blank', thresholds, save_plot=save_plots)
# test plots
df_images_th_label = get_df_bbox_label(threshold_bbox=0.7)
df_images_class = df_images_th_label[df_images_th_label['label'] == 'blank']
df_images_class['bbox_true'].value_counts(normalize=True).sort_values(ascending=False).sort_index(ascending=False)
True 0.006778 False 0.993222 Name: bbox_true, dtype: float64
Der Megadetector erkennt die blank Bilder sehr gut. Mit einem Threshold von 0.7 werden nur zu 0.67% Bounding Boxen vorhergesagt, somit werden zu 99.3% keine Tier auf der Klasse blank erkannt.
alle Klassen¶# runtime: ca. 15min
thresholds = [0.4, 0.5, 0.6, 0.7]
#thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
plot_count_bbox_th_class_all(thresholds, save_plot=save_plots)
Das erkennen der einzelnen Tierklassen ist oben abgebildet. Wie zu erwarten sinken die Anzahl Bounding Boxen mit steigendem Threshold. Ein guter Threshold scheint sich zwischen 0.4 und 0.7 zu finden.
def crop_image_to_normalized_bbox(image_path, normalized_bbox):
image = Image.open(image_path)
image_width, image_height = image.size
to_tensor = transforms.ToTensor()
image_tensor = to_tensor(image)
x, y, width, height = normalized_bbox
remove_bbox_xy = 4
remove_bbox_wh = remove_bbox_xy * 2
x = int(np.round(x * image_width, 0)) + remove_bbox_xy
y = int(np.round(y * image_height, 0)) + remove_bbox_xy
width = int(np.round(width * image_width, 0)) - remove_bbox_wh
height = int(np.round(height * image_height, 0)) - remove_bbox_wh
cropped_image_tensor = image_tensor[:, y:y+height, x:x+width]
to_pil = transforms.ToPILImage()
cropped_image = to_pil(cropped_image_tensor)
return cropped_image
image_path = "../megadetector/train_features_detection_th01/zj000000_detections.jpg"
image = Image.open(image_path).convert("RGB")
normalized_bbox = (0.1093, 0.5888, 0.0802, 0.1851)
cropped_image = crop_image_to_normalized_bbox(image_path, normalized_bbox)
display(image)
display(cropped_image)